****************************************************************************************************************
* This do file prepares data for summary statistic for product(hs 8 digit) and firm.
* 
* Input files: "Data/Coded data/Customs/customs_yearly_2000_2013_firmproduct_level_step1"
*
* Output files: "Data/Coded data/Customs/summstats_product"
*               "Data/Coded data/Customs/summstats_firm"
*
* Last modified by: Yukun Wang
* Date:02/26/2017
****************************************************************************************************************

clear all
set more off
set scheme s1color

***Set directory
capture cd "/Users/Dropbox/Chinese food exports"


       *********************
       *   Product Level   *
       *********************
	   
***import data
use "Data/Coded data/Customs/customs_yearly_2000_2013_firmproduct_level_step1",clear
keep if food==1
drop if year>=2008

bys hs_id year: egen sum_value=sum(value)
bys hs_id year: egen sum_quantity=sum(quantity)

preserve
use  "Data/country_codebook/oecd_high_income_country_list.dta", clear
replace oecdhighincome=0 if oecdhighincome==.
duplicates drop origin_id, force
tempfile oecd_list
save `oecd_list', replace
restore

merge m:1 origin_id using `oecd_list'   //mark oecd&high income countries
tab _m
replace oecdhighincome=0 if _m==1 //missing origin id
drop if _m==2
drop _m

bys hs_id year: egen sum_oecd_value=sum(value) if oecdhighincome==1
gen oecd_share=sum_oecd_value/sum_value

drop type_id2
collapse (mean) sum_value sum_quantity oecd_share dairy,by(hs_id year)

g price=sum_value/sum_quantity

foreach v of var oecd_share {
	replace `v'=0 if `v'==.
	}
	

ren sum_value value

g active=1

fillin hs_id year
replace value=0 if _fillin==1
g lvalue=log(value+0.00001)
replace active=0 if _fillin==1

bys hs_id: egen avg_size=mean(value)
bys hs_id: egen avg_lnsize=mean(lvalue)
bys hs_id: egen avg_exp=sum(active)
bys hs_id: egen avg_oecd_share=mean(oecd_share) if active==1
bys hs_id: egen avg_price=mean(price) if active==1

collapse (mean)avg_* dairy, by(hs_id)

merge m:1 hs_id using "Data/inspection_lists/contaminated_productlist_customs.dta"   //mark contaminated products
tab _m
gen affected_products=.
replace affected_products=1 if _m==3
replace affected_products=0 if _m==1
drop if _m==2
drop _m
drop product

save "Data/Coded data/Customs/summstats_product.dta",replace


       *********************
       *     Firm Level    *
       *********************

***import data
use "Data/Coded data/Customs/old/customs_yearly_2000_2013_firmproduct_level_step1",clear
keep if food==1
drop if year>=2008
***identify dairy firms and non-dairy food firms
bys party_id: egen dairyfirm=max(dairy)
bys party_id: egen nondairyfirm=min(dairy)
gen nondairyfoodfirm=(nondairyfirm==0)
drop nondairyfirm

bys party_id year: egen sum_value=sum(value)
bys party_id year: egen sum_quantity=sum(quantity)

preserve
use  "Data/country_codebook/oecd_high_income_country_list.dta", clear
replace oecdhighincome=0 if oecdhighincome==.
duplicates drop origin_id, force
tempfile oecd_list
save `oecd_list', replace
restore

merge m:1 origin_id using `oecd_list'   //mark oecd&high income countries
tab _m
replace oecdhighincome=0 if _m==1 //missing origin id
drop if _m==2
drop _m

bys party_id year: egen sum_oecd_value=sum(value) if oecdhighincome==1
gen oecd_share=sum_oecd_value/sum_value

drop type_id2
collapse (mean) sum_value oecd_share dairyfirm nondairyfoodfirm,by(party_id type_id year)

foreach v of var oecd_share {
	replace `v'=0 if `v'==.
	}
	

ren sum_value value

g active=1

fillin party_id year
replace value=0 if _fillin==1
g lvalue=log(value+0.1)
replace active=0 if _fillin==1

bys party_id: egen avg_size=mean(value)
bys party_id: egen avg_lnsize=mean(lvalue)
bys party_id: egen avg_exp=sum(active)
bys party_id: egen avg_oecd_share=mean(oecd_share) if active==1

collapse (mean)avg_* (firstnm) type_id (mean) dairyfirm nondairyfoodfirm, by(party_id)

***merge type_id2 for ownership(1: state-owned; 2: private; 3: joint; 4: foreign)
merge m:1 type_id using "Data/firm_codebook/company_type_codebook.dta"
tab _m //LOOK AT THIS
drop if _m==2
drop _m

merge m:1 party_id using "Data/inspection_lists/contaminated_firmlist_customs.dta"   //mark contaminated firms
tab _m
gen affected_firms=.
replace affected_firms=1 if _m==3
replace affected_firms=0 if _m==1
drop if _m==2
drop _m

g private=0 if type_id2!=.
replace private=1 if type_id2==2

merge m:1 party_id using "Data/inspection_lists/innocnet_firmlist_customs.dta"
g innocent_firms=.
replace innocent_firms=1 if _m==3
replace innocent_firms=0 if _m==1
drop if _m==2
drop _m


save "Data/Coded data/Customs/summstats_firm.dta",replace

